Dependencies

library(tidyverse)
library(knitr)
library(kableExtra)
library(janitor)
library(scales)
library(ggExtra)
library(ggrepel)
remotes::install_github("hughjonesd/ggmagnify")
library(ggmagnify)
library(ggplot2)
library(devtools)
devtools::install_github("psyteachr/introdataviz")
library(introdataviz)
library(survival)
library(lattice)
library(Hmisc)
library(report)
devtools::install_github("thomasp85/patchwork")
library(patchwork)
library(psych)

Data

Load the processed data and apply the global exclusions.

data_processed <- read_csv("../data/processed/data_processed.csv")

data_processed_after_exclusions <- data_processed |>
  filter(exclude_participant == "include")

Sample descriptives

Sample size before exclusions

data_processed |>
  count(name = "n") |>
  kable() |>
  add_header_above(header = c("Whole sample" = 1)) |> # note that you can add header rows to tables like this. The "1" indicates the number of columns the header should span. The sum of these numbers must equal the number of columns or you'll get an error.
  kable_classic(full_width = FALSE)
Whole sample
n
102

Sample size after exclusions

Sample used in subsequent analyses

data_processed_after_exclusions |>
  count(name = "n") |>
  kable() |>
  add_header_above(header = c("For analysis" = 1)) |>
  kable_classic(full_width = FALSE)
For analysis
n
90

Age

data_processed_after_exclusions |>
  mutate(age = as.numeric(age)) |>
  summarise(Mean = mean(age, na.rm = TRUE),
            SD = sd(age, na.rm = TRUE)) |>
  mutate_all(.funs = janitor::round_half_up, digits = 1) |>
  kable() |>
  add_header_above(header = c("Age" = 2)) |>
  kable_classic(full_width = FALSE)
Age
Mean SD
36.8 12.4

Gender

data_processed_after_exclusions |> 
  rename(Gender = gender) |>
  group_by(Gender) |> 
  summarise(n = n()) |> 
  mutate(Percent = paste0(round_half_up((n / sum(n)) * 100, 1), "%")) |>
  mutate(Gender = stringr::str_to_sentence(Gender)) |> # Change the case of the Gender variable so that it prints nicely
  kable() |>
  kable_classic(full_width = FALSE)
Gender n Percent
Female 39 43.3%
Male 48 53.3%
Nonbinary 3 3.3%

Descriptives

Descriptive statistics and plots of the measures (excluding the demographics variables)

Self-reported evaluations

Descriptive stats

# overall self-reported evaluations
dat_mean_ratings <- data_processed_after_exclusions |>
  summarise(Mean = mean(mean_evaluation, na.rm = TRUE),
            SD = sd(mean_evaluation, na.rm = TRUE),
            n = n()) |>
  mutate(group = "Full sample")

# self-reported evaluations by gender category
dat_mean_ratings_by_gender <- data_processed_after_exclusions |>
  group_by(group = gender) |>
  summarise(Mean = mean(mean_evaluation, na.rm = TRUE),
            SD = sd(mean_evaluation, na.rm = TRUE),
            n = n())

# combine both into one table
bind_rows(dat_mean_ratings,
          dat_mean_ratings_by_gender) |>
  select(Subset = group, Mean, SD, n) |> # select variables of interest, and rename one 
  mutate(Subset = stringr::str_to_sentence(Subset)) |> # Change the case of the Subset variable so that it prints nicely
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  add_header_above(header = c(" " = 1, "Self-reported evaluations" = 3)) |>
  kable_classic(full_width = FALSE)
Self-reported evaluations
Subset Mean SD n
Full sample 1.64 1.02 90
Female 1.32 0.71 39
Male 1.81 1.03 48
Nonbinary 3.33 2.19 3

Descriptive plot

ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) +
  geom_histogram(binwidth = 1,
                 boundary = 0,
                 fill = viridis_pal(begin = 0.45, option = "mako")(1), 
                 color = viridis_pal(begin = 0.30, option = "mako")(1)) + 
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme(panel.grid.minor = element_blank())

AMP evaluations

Descriptive stats

add table of means, SDs, Ns.

# overall AMP score
dat_AMP_score <- data_processed_after_exclusions |>
  summarise(Mean = mean(AMP_score, na.rm = TRUE),
            SD = sd(AMP_score, na.rm = TRUE),
            n = n()) |>
  mutate(group = "Full sample")

# AMP score by gender category
dat_AMP_score_by_gender <- data_processed_after_exclusions |>
  group_by(group = gender) |>
  summarise(Mean = mean(AMP_score, na.rm = TRUE),
            SD = sd(AMP_score, na.rm = TRUE),
            n = n())

# combine both into one table
bind_rows(dat_AMP_score,
          dat_AMP_score_by_gender) |>
  select(Subset = group, Mean, SD, n) |> # select variables of interest, and rename one 
  mutate(Subset = stringr::str_to_sentence(Subset)) |> # Change the case of the Subset variable so that it prints nicely
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  add_header_above(header = c(" " = 1, "AMP score" = 3)) |>
  kable_classic(full_width = FALSE)
AMP score
Subset Mean SD n
Full sample 0.58 0.19 90
Female 0.58 0.21 39
Male 0.58 0.18 48
Nonbinary 0.55 0.06 3

Descriptive plots

ggplot(data_processed_after_exclusions, aes(x = AMP_score)) +
  geom_histogram(binwidth = 0.05,
                 boundary = 0,
                 fill = viridis_pal(begin = 0.45, option = "mako")(1), 
                 color = viridis_pal(begin = 0.30, option = "mako")(1)) + 
  xlab("AMP score") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 10))

# Multiple versions


# Binwidth = 0.1, starting point not one
ggplot(data_processed_after_exclusions, aes(AMP_score)) +
  geom_histogram(binwidth = 0.1,
                 fill = "white",
                 colour = "black")

# Binwidth = 0.1, starting point 0
ggplot(data_processed_after_exclusions, aes(AMP_score)) +
  geom_histogram(binwidth = 0.1,
                 fill = "white",
                 colour = "black",
                 boundary = 0.05) +
  xlab("AMP score") +
  ylab("Frequency") +
  theme_linedraw()

# seperate for gender
ggplot(data_processed_after_exclusions, aes(AMP_score, fill = gender)) +
  geom_histogram(binwidth = 0.1,
                 boundary = 0,
                 colour = "black") +
  xlab("AMP score") +
  ylab("Frequency") +
  theme_linedraw()

Analyses & hypothesis tests

Cronbach’s Alpha AMP

Test if the three items of the AMP score have internal validity. Cronbach’s alpha is used to measure the degree of agreement (internal consistency) between several items (like, positive, prefer) in a questionnaire.

alpha(subset(data_processed_after_exclusions, select = c(like, positive, prefer)), check.keys =TRUE)
## 
## Reliability analysis   
## Call: alpha(x = subset(data_processed_after_exclusions, select = c(like, 
##     positive, prefer)), check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean sd median_r
##       0.88      0.88    0.84      0.71 7.5 0.023  1.6  1     0.68
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.88  0.92
## Duhachek  0.83  0.88  0.92
## 
##  Reliability if an item is dropped:
##          raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## like          0.81      0.81    0.68      0.68 4.2    0.040    NA  0.68
## positive      0.88      0.88    0.79      0.79 7.7    0.024    NA  0.79
## prefer        0.79      0.80    0.67      0.67 4.0    0.043    NA  0.67
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean  sd
## like     90  0.90  0.91  0.86   0.79  1.6 1.0
## positive 90  0.88  0.87  0.75   0.71  1.7 1.2
## prefer   90  0.91  0.92  0.87   0.80  1.7 1.1
## 
## Non missing response frequency for each item
##             1    2    3    4    5    6    7 miss
## like     0.67 0.18 0.10 0.02 0.02 0.01 0.00    0
## positive 0.68 0.16 0.07 0.07 0.00 0.02 0.01    0
## prefer   0.67 0.12 0.11 0.07 0.03 0.00 0.00    0

Interpretation:

As the three items all have the same value range, the value can be read under “raw_alpa”, which is 0.877. A value of 0.87 is a very good value. The three items show a sufficiently high inter-item correlation.The 95% confidence values, which are shown as 0.83 and 0.92 for lower and upper respectively, can also be important.

Hypothesis 1: Self-reported evaluations are correlated with evaluations on the AMP

Plot

In the first two graphs, a regression line is drawn in which each variable is conceptualized once as an independent variable and once as a dependent variable. However, since a causal relationship is not assumed, but merely a correlative relationship, the third graph shows a scatterplot without a regression line.

# Self-reported evaluation as dependent variable
ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  geom_smooth(method = "lm",
              color = viridis_pal(begin = 0.45, option = "mako")(1)) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 

# AMP score as dependent variable
ggplot(data_processed_after_exclusions, 
       aes(y = AMP_score,
           x = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  geom_smooth(method = "lm",
              color = viridis_pal(begin = 0.45, option = "mako")(1)) +
  ylab("AMP score") +
  xlab("Mean self-reported evaluation") +
  theme_linedraw() 

# Scatterplot
p1 <- ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 
p1

More complex plots:

Axial histograms

Scatter plots with axial histograms using ggExtra: https://cran.r-project.org/web/packages/ggExtra/vignettes/ggExtra.html

add axial histograms to a scatter plot. Split both the scatter plot and the histograms by gender.

Axial histograms for all three gender categories (female, male, non-binary)
# Not grouped by gender
Axial_histogram <- ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 

ggMarginal(Axial_histogram, type = "histogram")


# Grouped by gender
library(scales)

Axial_histogram_gender <- ggplot(data_processed_after_exclusions,
              aes(x = AMP_score,
                  y = mean_evaluation,
                  colour = gender)) +
  geom_point() +
  geom_smooth(aes(colour = gender), method = "lm", se = FALSE) +
  scale_colour_manual(name = "gender",
                      labels = c("Female", "Male","Non-binary"),
                      values = c("female" = "#ffc1c1", "male" = "#1e90ff", "nonbinary" = "#ffa500")) +
  theme(legend.position = c(.15,.7)) +
  labs(x = "AMP score", y = "Self-reported evaluations")

# density plot
ggMarginal(Axial_histogram_gender, type = "density", groupColour = TRUE, groupFill = TRUE)
# histogram plot
ggMarginal(Axial_histogram_gender, type = "histogram", groupColour = TRUE, groupFill = TRUE)

Axial histograms for only female and male
# Filter female and male
data_processed_after_exclusions_H1 <- data_processed_after_exclusions %>%
  filter(gender != "nonbinary")

# Not grouped by gender
Axial_histogram_male_female <- ggplot(data_processed_after_exclusions_H1, 
       aes(x = AMP_score,
           y = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 

ggMarginal(Axial_histogram_male_female, type = "histogram")


# Grouped by gender
library(scales)

Axial_histogram_gender_male_female <- ggplot(data_processed_after_exclusions_H1,
              aes(x = AMP_score,
                  y = mean_evaluation,
                  colour = gender)) +
  geom_point() +
  geom_smooth(aes(colour = gender), method = "lm", se = FALSE) +
  scale_colour_manual(name = "gender",
                      labels = c("Female", "Male"),
                      values = c("female" = "#ffc1c1", "male" = "#1e90ff")) +
  theme(legend.position = c(.15,.7)) +
  labs(x = "AMP score", y = "Self-reported evaluations")

# density plot
ggMarginal(Axial_histogram_gender_male_female, type = "density", groupColour = TRUE, groupFill = TRUE)
# histogram plot
ggMarginal(Axial_histogram_gender_male_female, type = "histogram", groupColour = TRUE, groupFill = TRUE)

Labelled points

Label points using ggrepel: https://cran.r-project.org/web/packages/ggrepel/vignettes/ggrepel.html

Label the points in a scatter plot using their participant codes.

Plot with labels for all participants

Axial_histogram_labelled <- ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation,
           label = subject)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 

labeled_scatter_plot <- Axial_histogram_labelled + geom_text_repel()
Axial_histogram_labelled + geom_text_repel()

As this plot is very messy, I only label the participants with outliers in one or both variables in a second graph. Outliers are defined using the interquartile range.

# AMP_score
Q1_AMP_score <- quantile(data_processed_after_exclusions$AMP_score, 0.25)
Q3_AMP_score <- quantile(data_processed_after_exclusions$AMP_score, 0.75)
IQR_AMP_score <- Q3_AMP_score - Q1_AMP_score

# Define upper and lower bounds
upper_bound_AMP_score <- Q3_AMP_score + 1.5 * IQR_AMP_score
lower_bound_AMP_score <- Q1_AMP_score - 1.5 * IQR_AMP_score

# Identify potential outliers
outliers_AMP_score <- data.frame(data_processed_after_exclusions %>%
  mutate(outliers_AMP_score = ifelse(AMP_score > upper_bound_AMP_score |
                            AMP_score < lower_bound_AMP_score,
                          "outlier", "non outlier")))



# mean_evaluation
Q1_mean_evaluation <- quantile(data_processed_after_exclusions$mean_evaluation, 0.25)
Q3_mean_evaluation <- quantile(data_processed_after_exclusions$mean_evaluation, 0.75)
IQR_mean_evaluation <- Q3_mean_evaluation - Q1_mean_evaluation

# Define upper and lower bounds
upper_bound_mean_evaluation <- Q3_mean_evaluation + 1.5 * IQR_mean_evaluation
lower_bound_mean_evaluation <- Q1_mean_evaluation - 1.5 * IQR_mean_evaluation

# Identify potential outliers
outliers_mean_evaluation <- data.frame(data_processed_after_exclusions %>%
  mutate(outliers_mean_evaluation = ifelse(mean_evaluation > upper_bound_mean_evaluation |
                            mean_evaluation < lower_bound_mean_evaluation,
                          "outlier", "non outlier")))

# combine dataframes by subject
data_processed_outliers <- data_processed_after_exclusions %>%
  full_join(outliers_AMP_score, by = "subject") %>% 
  full_join(outliers_mean_evaluation, by = "subject")

# as character
data_processed_outliers$outliers_AMP_score <- as.character(data_processed_outliers$outliers_AMP_score)
data_processed_outliers$outliers_mean_evaluation<- as.character(data_processed_outliers$outliers_mean_evaluation)


labels <-  data_processed_outliers %>%
  filter(outliers_AMP_score == "outlier" | outliers_mean_evaluation == "outlier")



Axial_histogram_labelled_outliers <- ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation,
           label = subject)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 


labeled_scatter_plot_outliers <- Axial_histogram_labelled_outliers + geom_text_repel(data = labels)
Axial_histogram_labelled_outliers + geom_text_repel(data = labels)

Magnify areas

Magnify areas of your plot with ggmagnify: https://hughjonesd.github.io/ggmagnify/

Magnify an area of one of your scatter plots, e.g., where there are a lot of data points in a small area.

from <- c(xmin = 0.45, xmax = 0.68, ymin = 0.85, ymax = 1.31)
# Names xmin, xmax, ymin, ymax are optional:
to <- c(xmin = 0.00, xmax = 0.25, ymin = 2, ymax = 3.3)

Axial_histogram + geom_magnify(from = from, to = to,
                               colour = "black", linewidth = 0.6, proj.linetype = 3)

Test

run an appropriate test. Below the output, interpret the results: write a few sentences that report and interpret

To test if the self-reported evaluations are correlated with evaluations in the AMP score, I calculated a Pearson correlation. The Pearson correlation coefficient (r) is a statistic that determines how closely two variables are related. Its value ranges from -1 to +1, with 0 denoting no linear correlation, -1 denoting a perfect negative linear correlation, and +1 denoting a perfect positive linear correlation. A correlation between variables means that as one variable’s value changes, the other tends to change in the same way. It is important to remember that a correlation is no indicator of causality, as the variables are treated equal!

correlation_H1 <- report(cor.test(data_processed_after_exclusions$mean_evaluation,
                      data_processed_after_exclusions$AMP_score,
                      method = "pearson"))
correlation_H1
## Effect sizes were labelled following Funder's (2019) recommendations.
## 
## The Pearson's product-moment correlation between
## data_processed_after_exclusions$mean_evaluation and
## data_processed_after_exclusions$AMP_score is negative, statistically not
## significant, and small (r = -0.12, 95% CI [-0.32, 0.09], t(88) = -1.17, p =
## 0.244)

Interpretation:

The pearson correlation yields a value of -0.124. As this is a negative sign, it indicates that the two variables correlate negatively with each other. This means that when one variable changes, the other variable changes in the opposite direction. When people report increasing self-reported evaluations, then their evaluation on the AMP tends to decrease. This is against our initial assumptions. The strength of this correlation is to be classified as only weak (0 to -.3) and statistically not significant.

Hypothesis 2: Self-reported evaluations differ between men and women

Plot

split histogram, split violin plot, raincloud plot, etc.

Split histogram plot (for all genders)
# All in one histogram ("stacked")
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) +
  geom_histogram(aes(fill = gender),
                 binwidth = 1,
                 boundary = 0,
                 color = "black") + 
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme(panel.grid.minor = element_blank())

# All in one histogram ("dodged")
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation,
                                            group = gender,
                                            fill = gender))+
  geom_histogram(position="dodge",binwidth=0.4) +
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme_bw()

# Separate histograms next to eachother
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) + 
  geom_histogram() + 
  facet_grid(~gender) + 
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme_bw()

Split histogram plot (only female and male)
# Filter female and male
data_processed_after_exclusions_H2 <- data_processed_after_exclusions %>%
  filter(gender != "nonbinary")

# All in one histogram ("stacked")
ggplot(data_processed_after_exclusions_H2, aes(x = mean_evaluation)) +
  geom_histogram(aes(fill = gender),
                 binwidth = 1,
                 boundary = 0,
                 color = "black") + 
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme(panel.grid.minor = element_blank())

# All in one histogram ("dodged")
ggplot(data_processed_after_exclusions_H2, aes(x = mean_evaluation,
                                            group = gender,
                                            fill = gender))+
  geom_histogram(position="dodge",binwidth=0.4) +
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme_bw()

# Separate histograms next to eachother
ggplot(data_processed_after_exclusions_H2, aes(x = mean_evaluation)) + 
  geom_histogram() + 
  facet_grid(~gender) + 
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme_bw()

Split violin plot (seperate for all genders)
ggplot(data_processed_after_exclusions, aes(x = gender, y = mean_evaluation,
                                            fill = gender)) +
  introdataviz::geom_split_violin(alpha = .4, trim = FALSE) +
  geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
  stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F, 
               position = position_dodge(.175)) +
  scale_x_discrete(name = "Gender", labels = c("female", "male", "nonbinary")) +
  scale_y_continuous(name = "Self-reported evaluations",
                     breaks = seq(1, 7, 1), 
                     limits = c(1, 7)) +
  scale_fill_brewer(palette = "Dark2", name = "Gender") +
  theme_minimal()

Split violin plot (only men and women)
p2 <- ggplot(data_processed_after_exclusions_H2, aes(x = "", y = mean_evaluation,
                                            fill = gender)) +
  introdataviz::geom_split_violin(alpha = .4, trim = FALSE) +
  geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
  stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F, 
               position = position_dodge(.175)) +
  scale_y_continuous(name = "Self-reported evaluations",
                     breaks = seq(1, 7, 1), 
                     limits = c(1, 7)) +
  scale_fill_brewer(palette = "Dark2", name = "Gender") +
  xlab("Gender") +
  theme_minimal() +
  scale_fill_manual(values = c("male" = "#1e90ff", "female" = "#ffc1c1"),
                    labels = c("male" = "Male", "female" = "Female"))
p2

Split raincloud plot (only men and women)
rain_height <- .1

ggplot(data_processed_after_exclusions_H2, aes(x = "", y = mean_evaluation,
                                               fill = gender)) +
  # clouds
  introdataviz::geom_flat_violin(trim=FALSE, alpha = 0.4,
    position = position_nudge(x = rain_height+.05)) +
  # rain
  geom_point(aes(colour = gender), size = 2, alpha = .5, show.legend = FALSE, 
              position = position_jitter(width = rain_height, height = 0)) +
  # boxplots
  geom_boxplot(width = rain_height, alpha = 0.4, show.legend = FALSE, 
               outlier.shape = NA,
               position = position_nudge(x = -rain_height*2)) +
  # mean and SE point in the cloud
  stat_summary(fun.data = mean_cl_normal, mapping = aes(color = gender), show.legend = FALSE,
               position = position_nudge(x = rain_height * 3)) +
  # adjust layout
  scale_x_discrete(name = "", expand = c(rain_height*3, 0, 0, 0.7)) +
  scale_y_continuous(name = "mean self-reported evaluation",
                     breaks = seq(1, 7, 1), 
                     limits = c(1, 7)) +
  coord_flip() +
  facet_wrap(~factor(gender, 
                     levels = c("female", "male"), 
                     labels = c("Female", "Male")), 
             nrow = 2) +
  # custom colours and theme
  scale_fill_brewer(palette = "Dark2", name = "Gender") +
  scale_colour_brewer(palette = "Dark2") +
  theme_minimal() +
  theme(panel.grid.major.y = element_blank(),
        legend.position = c(0.8, 0.8),
        legend.background = element_rect(fill = "white", color = "white"))

Test

As a test statistic, I use the unpaired two-samples t-test to compare the mean self-reported evaluation of the two independent groups (men and women). I chose to test two-sided, as there is no indication why one sex should achieve higher values than the other; no directional hypothesis exists.

t.test(data_processed_after_exclusions_H2$mean_evaluation~data_processed_after_exclusions_H2$gender,
       var.equal = TRUE,
       alternative = "two.sided")
## 
##  Two Sample t-test
## 
## data:  data_processed_after_exclusions_H2$mean_evaluation by data_processed_after_exclusions_H2$gender
## t = -2.5221, df = 85, p-value = 0.01353
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -0.8750556 -0.1035768
## sample estimates:
## mean in group female   mean in group male 
##             1.316239             1.805556

Interpretation:

The t-test consists of the following two hypothesis:

  • Null hypothesis (H0): The means for the two populations are equal.

  • Alternative hypothesis (H1) : The means for the two populations are not equal.

As the p-value (0.014) is less than our specified significance level (0.05), we can reject the null hypothesis. This means that the difference between the two means is statistically significant. The sample provides strong enough evidence to conclude that the two population means are not equal. Men and women differ statistically significant in their mean self-reported evaluation. This is further shown in the fact that the 95% confidence interval does not contain 0. The mean self-reported evaluation of men (1.81) is significantly higher than the mean self-reported evaluation of women (1.32).

Hypothesis 3: Evaluations on the Affect Misattribution Procedure differ between men and women

Plot

split histogram, split violin plot, raincloud plot, etc.

This time, vary the labeling and order of the legend, e.g., capitalise “Men” and “Women”, and know how to change the order of the factors.

Split histogram plot (only men and women)
# Filter female and male
data_processed_after_exclusions_H3 <- data_processed_after_exclusions %>%
  filter(gender != "nonbinary")


# All in one histogram ("stacked")
ggplot(data_processed_after_exclusions_H3, aes(x = AMP_score)) +
  geom_histogram(aes(fill = gender),
                 binwidth = 0.1,
                 boundary = 0,
                 color = "black") + 
  xlab("Mean AMP score") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 10)) +
  coord_cartesian(xlim = c(0, 1)) +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values = c("male" = "#1e90ff", "female" = "#ffc1c1"),
                    labels = c("male" = "Male", "female" = "Female"))

# All in one histogram ("dodged")
ggplot(data_processed_after_exclusions_H3, aes(x = AMP_score,
                                            group = gender,
                                            fill = gender))+
  geom_histogram(position="dodge",binwidth=0.1) + 
  xlab("Mean AMP score") +
  ylab("Frequency") +
  scale_x_continuous(breaks = pretty_breaks(n = 10)) +
  coord_cartesian(xlim = c(0, 1)) +
  theme_bw() +
  scale_fill_manual(values = c("male" = "#1e90ff", "female" = "#ffc1c1"),
                    labels = c("male" = "Male", "female" = "Female"))

# Separate histograms next to eachother
ggplot(data_processed_after_exclusions_H3, aes(x = AMP_score)) + 
  geom_histogram() + 
  facet_grid(~gender) + 
  xlab("Mean AMP score") +
  ylab("Frequency") +
  scale_x_continuous(breaks = pretty_breaks(n = 10)) +
  coord_cartesian(xlim = c(0, 1)) +
  theme_bw()

Split violin plot (only men and women)
p3 <- ggplot(data_processed_after_exclusions_H3, aes(x = "", y = AMP_score,
                                            fill = gender)) +
  introdataviz::geom_split_violin(alpha = .4, trim = FALSE) +
  geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
  stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F, 
               position = position_dodge(.175)) +
  scale_y_continuous(name = "Mean AMP score",
                     breaks = seq(0, 1, 0.100), 
                     limits = c(0, 1)) +
  xlab("Gender") +
  theme_minimal() +
  scale_fill_manual(values = c("male" = "#1e90ff", "female" = "#ffc1c1"),
                    labels = c("male" = "Male", "female" = "Female"))


p3

Split raincloud plot (only men and women)
rain_height <- .1

ggplot(data_processed_after_exclusions_H3, aes(x = "", y = AMP_score,
                                               fill = gender)) +
  # clouds
  introdataviz::geom_flat_violin(trim=FALSE, alpha = 0.4,
    position = position_nudge(x = rain_height+.05)) +
  # rain
  geom_point(aes(colour = gender), size = 2, alpha = .5, show.legend = FALSE, 
              position = position_jitter(width = rain_height, height = 0)) +
  # boxplots
  geom_boxplot(width = rain_height, alpha = 0.4, show.legend = FALSE, 
               outlier.shape = NA,
               position = position_nudge(x = -rain_height*2)) +
  # mean and SE point in the cloud
  stat_summary(fun.data = mean_cl_normal, mapping = aes(color = gender), show.legend = FALSE,
               position = position_nudge(x = rain_height * 3)) +
  # adjust layout
  scale_x_discrete(name = "", expand = c(rain_height*3, 0, 0, 0.7)) +
  scale_y_continuous(name = "Mean AMP score",
                     breaks = seq(0, 1, 0.1), 
                     limits = c(0, 1)) +
  coord_flip() +
  facet_wrap(~factor(gender, 
                     levels = c("female", "male"), 
                     labels = c("Female", "Male")), 
             nrow = 2) +
  # custom colours and theme
  scale_fill_brewer(palette = "Dark2", name = "Gender") +
  scale_colour_brewer(palette = "Dark2") +
  theme_minimal() +
  theme(panel.grid.major.y = element_blank(),
        legend.position = c(0.1, 0.5),
        legend.background = element_rect(fill = "white", color = "white"))

Test

run an appropriate test. Below the output, print an interpretation of the results generated by the “easystats” package

https://easystats.github.io/report/)

As a test statistic, I use the unpaired two-samples t-test to compare the mean AMP score of the two independent groups (men and women). I chose to test two-sided, as there is no indication why one sex should achieve higher values than the other; no directional hypothesis exists.

t.test(data_processed_after_exclusions_H3$AMP_score~data_processed_after_exclusions_H2$gender,
       var.equal = TRUE,
       alternative = "two.sided")
## 
##  Two Sample t-test
## 
## data:  data_processed_after_exclusions_H3$AMP_score by data_processed_after_exclusions_H2$gender
## t = 0.074348, df = 85, p-value = 0.9409
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -0.08136332  0.08768454
## sample estimates:
## mean in group female   mean in group male 
##            0.5847578            0.5815972

Interpretation:

The t-test consists of the following two hypothesis:

  • Null hypothesis (H0): The means for the two populations are equal.

  • Alternative hypothesis (H1) : The means for the two populations are not equal.

As the p-value (0.94) is greater than our specified significance level (0.05), we can’t reject the null hypothesis. This means that the difference between the two means is not statistically significant. The sample does not provide strong enough evidence to conclude that the two population means are not equal. Men and women do not differ statistically significant in their mean AMP score. This is further shown in the fact that the 95% confidence interval does contain 0. The mean AMP score of women (0.585) is not significantly higher than the mean AMP score of men (0.582).

report(t.test(data_processed_after_exclusions_H3$AMP_score ~ data_processed_after_exclusions_H3$gender,
       var.equal = TRUE,
       alternative = "two.sided"))
## Effect sizes were labelled following Cohen's (1988) recommendations.
## 
## The Two Sample t-test testing the difference of
## data_processed_after_exclusions_H3$AMP_score by
## data_processed_after_exclusions_H3$gender (mean in group female = 0.58, mean in
## group male = 0.58) suggests that the effect is positive, statistically not
## significant, and very small (difference = 3.16e-03, 95% CI [-0.08, 0.09], t(85)
## = 0.07, p = 0.941; Cohen's d = 0.02, 95% CI [-0.41, 0.44])

Combining plots

Combine plots using patchwork: https://patchwork.data-imaginist.com/

Combine at least three of the above plots into one.

library(patchwork)
combined_AMP <- p1 / (p2 | p3)
combined_AMP

Saving plots

Save plots to disk with ggsave()

Save the above combined plot to disk as both .png and .pdf. Ensure the png has at least 300dpi resolution.

ggsave("combined_AMP.pdf")
ggsave("combined_AMP.png")

Session info

It tells everything about the instances of r that created this work. Useful for recreation of the code and the results. There is reproducability over time. Prints at the end of the HTML file.

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] psych_2.2.5             patchwork_1.1.3.9000    report_0.5.7           
##  [4] Hmisc_5.1-0             lattice_0.20-45         survival_3.2-13        
##  [7] introdataviz_0.0.0.9003 devtools_2.4.5          usethis_2.1.6          
## [10] ggmagnify_0.2.0.9000    ggrepel_0.9.4           ggExtra_0.10.1         
## [13] scales_1.2.0            janitor_2.2.0           kableExtra_1.3.4       
## [16] knitr_1.39              forcats_0.5.1           stringr_1.4.0          
## [19] dplyr_1.1.0             purrr_1.0.1             readr_2.1.2            
## [22] tidyr_1.2.0             tibble_3.1.8            ggplot2_3.3.6          
## [25] tidyverse_1.3.2        
## 
## loaded via a namespace (and not attached):
##   [1] googledrive_2.0.0   colorspace_2.0-3    ellipsis_0.3.2     
##   [4] snakecase_0.11.1    htmlTable_2.4.1     parameters_0.21.3  
##   [7] base64enc_0.1-3     fs_1.5.2            rstudioapi_0.13    
##  [10] farver_2.1.0        remotes_2.4.2.1     bit64_4.0.5        
##  [13] fansi_1.0.3         lubridate_1.9.2     xml2_1.3.3         
##  [16] splines_4.1.3       mnormt_2.0.2        cachem_1.0.6       
##  [19] pkgload_1.3.2       Formula_1.2-5       jsonlite_1.8.0     
##  [22] broom_0.8.0         cluster_2.1.2       dbplyr_2.3.0       
##  [25] effectsize_0.8.6    shiny_1.7.4         compiler_4.1.3     
##  [28] httr_1.4.2          backports_1.4.1     assertthat_0.2.1   
##  [31] Matrix_1.5-1        fastmap_1.1.0       gargle_1.3.0       
##  [34] cli_3.6.0           later_1.3.0         htmltools_0.5.4    
##  [37] prettyunits_1.1.1   tools_4.1.3         gtable_0.3.0       
##  [40] glue_1.6.2          Rcpp_1.0.8.3        cellranger_1.1.0   
##  [43] jquerylib_0.1.4     vctrs_0.5.2         nlme_3.1-155       
##  [46] svglite_2.1.0       insight_0.19.7      xfun_0.41          
##  [49] ps_1.7.0            rvest_1.0.2         timechange_0.2.0   
##  [52] mime_0.12           miniUI_0.1.1.1      lifecycle_1.0.3    
##  [55] googlesheets4_1.0.1 vroom_1.5.7         ragg_1.2.5         
##  [58] hms_1.1.1           promises_1.2.0.1    parallel_4.1.3     
##  [61] RColorBrewer_1.1-3  yaml_2.3.5          curl_4.3.2         
##  [64] memoise_2.0.1       gridExtra_2.3       sass_0.4.1         
##  [67] rpart_4.1.16        stringi_1.7.6       bayestestR_0.13.1  
##  [70] highr_0.9           checkmate_2.1.0     pkgbuild_1.4.0     
##  [73] rlang_1.0.6         pkgconfig_2.0.3     systemfonts_1.0.4  
##  [76] evaluate_0.15       labeling_0.4.2      htmlwidgets_1.6.1  
##  [79] bit_4.0.4           tidyselect_1.2.0    processx_3.8.0     
##  [82] plyr_1.8.8          magrittr_2.0.3      R6_2.5.1           
##  [85] generics_0.1.2      profvis_0.3.7       DBI_1.1.2          
##  [88] mgcv_1.8-39         pillar_1.7.0        haven_2.5.1        
##  [91] foreign_0.8-82      withr_2.5.0         datawizard_0.9.0   
##  [94] nnet_7.3-17         modelr_0.1.10       crayon_1.5.1       
##  [97] utf8_1.2.2          tmvnsim_1.0-2       tzdb_0.3.0         
## [100] rmarkdown_2.14      urlchecker_1.0.1    grid_4.1.3         
## [103] readxl_1.4.2        data.table_1.14.2   callr_3.7.3        
## [106] reprex_2.0.2        digest_0.6.31       webshot_0.5.3      
## [109] xtable_1.8-4        httpuv_1.6.9        textshaping_0.3.6  
## [112] munsell_0.5.0       viridisLite_0.4.0   bslib_0.3.1        
## [115] sessioninfo_1.2.2